import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import preprocessing
from data_load import get_clean_data,normalize_1_variables,normalize_2_variables,normalize_3_variables,heatmapify
sns.set(style="whitegrid")
sns.set(style="ticks", color_codes=True)
sns.set(font_scale=2)
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
# another cell
import matplotlib.pyplot as plt
%matplotlib inline
for i in range(10):
plt.plot(range(10))
plt.show()
data = get_clean_data('quiz_data.csv',True)
print("there are "+str(len(data))+" responses from people satisfied with their program")
print(data.columns)
print(data.head())
print("Summary of the variable: program")
fig, axs = plt.subplots(1,2)
fig.suptitle('program')
data['program'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['program'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: happy")
fig, axs = plt.subplots(1,2)
fig.suptitle('happy')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: problem_type")
fig, axs = plt.subplots(1,2)
fig.suptitle('problem_type')
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: creative")
fig, axs = plt.subplots(1,2)
fig.suptitle('creative')
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - architecture")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - architecture')
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - technology")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - technology')
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - automotive")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - automotive')
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - business")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - business')
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - construction")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - construction')
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - health")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - health')
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - environment")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - environment')
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: industry - manufacturing")
fig, axs = plt.subplots(1,2)
fig.suptitle('industry - manufacturing')
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: outdoors")
fig, axs = plt.subplots(1,2)
fig.suptitle('outdoors')
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: career")
fig, axs = plt.subplots(1,2)
fig.suptitle('career')
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: group_work")
fig, axs = plt.subplots(1,2)
fig.suptitle('group_work')
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: liked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_courses')
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: disliked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_courses')
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: programming")
fig, axs = plt.subplots(1,2)
fig.suptitle('programming')
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: join_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('join_clubs')
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: not_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('not_clubs')
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: liked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_projects')
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: disliked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_projects')
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: tv_shows")
fig, axs = plt.subplots(1,2)
fig.suptitle('tv_shows')
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: alternate_degree")
fig, axs = plt.subplots(1,2)
fig.suptitle('alternate_degree')
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: expensive_equipment")
fig, axs = plt.subplots(1,2)
fig.suptitle('expensive_equipment')
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: drawing")
fig, axs = plt.subplots(1,2)
fig.suptitle('drawing')
data['drawing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['drawing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
print("Summary of the variable: essay")
fig, axs = plt.subplots(1,2)
fig.suptitle('essay')
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="Frequency",
ax=axs[0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="Percent",
ax=axs[1])
plot_data = normalize_1_variables(data,"program","happy")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"happy",list(data["happy"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["happy"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. happy')
plot_data = normalize_1_variables(data,"program","problem_type")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"problem_type",list(data["problem_type"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["problem_type"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. problem_type')
plot_data = normalize_1_variables(data,"program","creative")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"creative",list(data["creative"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["creative"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. creative')
plot_data = normalize_1_variables(data,"program","outdoors")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"outdoors",list(data["outdoors"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["outdoors"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. outdoors')
plot_data = normalize_1_variables(data,"program","career")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"career",list(data["career"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["career"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. career')
plot_data = normalize_1_variables(data,"program","group_work")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"group_work",list(data["group_work"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["group_work"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. group_work')
plot_data = normalize_1_variables(data,"program","liked_courses")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"liked_courses",list(data["liked_courses"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["liked_courses"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. liked_courses')
plot_data = normalize_1_variables(data,"program","disliked_courses")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"disliked_courses",list(data["disliked_courses"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["disliked_courses"].unique()))
fig, ax = plt.subplots(figsize=(12,12))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. disliked_courses')
plot_data = normalize_1_variables(data,"program","programming")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"programming",list(data["programming"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["programming"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. programming')
plot_data = normalize_1_variables(data,"program","join_clubs")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"join_clubs",list(data["join_clubs"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["join_clubs"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. join_clubs')
plot_data = normalize_1_variables(data,"program","not_clubs")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"not_clubs",list(data["not_clubs"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["not_clubs"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. not_clubs')
plot_data = normalize_1_variables(data,"program","liked_projects")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"liked_projects",list(data["liked_projects"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["liked_projects"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. liked_projects')
plot_data = normalize_1_variables(data,"program","disliked_projects")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"disliked_projects",list(data["disliked_projects"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["disliked_projects"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. disliked_projects')
plot_data = normalize_1_variables(data,"program","tv_shows")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"tv_shows",list(data["tv_shows"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["tv_shows"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs.tv_shows')
plot_data = normalize_1_variables(data,"program","alternate_degree")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"alternate_degree",list(data["alternate_degree"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["alternate_degree"].unique()))
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. alternate_degree')
plot_data = normalize_1_variables(data,"program","drawing")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"drawing",list(data["drawing"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["drawing"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. drawing')
plot_data = normalize_1_variables(data,"program","essay")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"essay",list(data["essay"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["essay"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. essay')
plot_data = normalize_1_variables(data,"program","architecture")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"architecture",list(data["architecture"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["architecture"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - architecture')
plot_data = normalize_1_variables(data,"program","automotive")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"automotive",list(data["automotive"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["automotive"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - automotive')
plot_data = normalize_1_variables(data,"program","business")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"business",list(data["business"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["business"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - business')
plot_data = normalize_1_variables(data,"program","construction")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"construction",list(data["construction"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["construction"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - construction')
plot_data = normalize_1_variables(data,"program","health")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"health",list(data["health"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["health"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - health')
plot_data = normalize_1_variables(data,"program","environment")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"environment",list(data["environment"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["environment"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - environment')
plot_data = normalize_1_variables(data,"program","manufacturing")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"manufacturing",list(data["manufacturing"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["manufacturing"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - manufacturing')
plot_data = normalize_1_variables(data,"program","technology")
plot_data = heatmapify(plot_data,"program",list(data["program"].unique()),"technology",list(data["technology"].unique()))
df = pd.DataFrame(plot_data, columns=list(data["technology"].unique()))
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(df,annot=True,cmap="Blues").set_title('program Vs. industry - technology')
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: mech")
data = data[data.program=='mech']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('mech')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: bmed")
data = data[data.program=='bmed']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('bmed')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: swe")
data = data[data.program=='swe']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('sft')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: ce")
data = data[data.program=='ce']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('ce')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: tron")
data = data[data.program=='tron']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('tron')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: cive")
data = data[data.program=='cive']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('cive')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: chem")
data = data[data.program=='chem']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('chem')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: syde")
data = data[data.program=='syde']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('syde')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: msci")
data = data[data.program=='msci']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('msci')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: elec")
data = data[data.program=='elec']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('elec')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: nano")
data = data[data.program=='nano']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('nano')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: geo")
data = data[data.program=='geo']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('geo')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: env")
data = data[data.program=='env']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('env')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: arch-e")
data = data[data.program=='arch-e']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('arch-e')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
data = get_clean_data('quiz_data.csv',True)
print("Summary of the Program: arch")
data = data[data.program=='arch']
fig, axes = plt.subplots(nrows=25, ncols=2)
fig.suptitle('arch')
data['happy'].value_counts().plot(kind='bar',
figsize=(14,8),
title="happyFrequency",
ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="happyPercent",
ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
figsize=(14,8),
title="problem_typeFrequency",
ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="problem_typePercent",
ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
figsize=(14,8),
title="creativeFrequency",
ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="creativePercent",
ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
figsize=(14,8),
title="outdoorsFrequency",
ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="outdoorsPercent",
ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
figsize=(14,8),
title="careerFrequency",
ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="careerPercent",
ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
figsize=(14,8),
title="group_workFrequency",
ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="group_workPercent",
ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_coursesFrequency",
ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_coursesPercent",
ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_coursesFrequency",
ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_coursesPercent",
ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
figsize=(14,8),
title="programmingFrequency",
ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="programmingPercent",
ax=axes[8][1])
data['join_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="join_clubsFrequency",
ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="join_clubsPercent",
ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
figsize=(14,8),
title="not_clubsFrequency",
ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="not_clubsPercent",
ax=axes[10][1])
data['liked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="liked_projectsFrequency",
ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="liked_projectsPercent",
ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
figsize=(14,8),
title="disliked_projectsFrequency",
ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="disliked_projectsPercent",
ax=axes[12][1])
data['tv_shows'].value_counts().plot(kind='bar',
figsize=(14,8),
title="tv_showsFrequency",
ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="tv_showsPercent",
ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
figsize=(14,8),
title="alternate_degreeFrequency",
ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="alternate_degreePercent",
ax=axes[14][1])
data['expensive_equipment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentFrequency",
ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="expensive_equipmentPercent",
ax=axes[15][1])
data['essay'].value_counts().plot(kind='bar',
figsize=(14,8),
title="essayFrequency",
ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="essayPercent",
ax=axes[16][1])
data['architecture'].value_counts().plot(kind='bar',
figsize=(14,8),
title="architectureFrequency",
ax=axes[17][0])
(data['architecture'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="architecturePercent",
ax=axes[17][1])
data['automotive'].value_counts().plot(kind='bar',
figsize=(14,8),
title="automotiveFrequency",
ax=axes[18][0])
(data['automotive'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="automotivePercent",
ax=axes[18][1])
data['business'].value_counts().plot(kind='bar',
figsize=(14,8),
title="businessFrequency",
ax=axes[19][0])
(data['business'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="businessPercent",
ax=axes[19][1])
data['construction'].value_counts().plot(kind='bar',
figsize=(14,8),
title="constructionFrequency",
ax=axes[20][0])
(data['construction'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="constructionPercent",
ax=axes[20][1])
data['health'].value_counts().plot(kind='bar',
figsize=(14,8),
title="healthFrequency",
ax=axes[21][0])
(data['health'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="healthPercent",
ax=axes[21][1])
data['environment'].value_counts().plot(kind='bar',
figsize=(14,8),
title="environmentFrequency",
ax=axes[22][0])
(data['environment'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="environmentPercent",
ax=axes[22][1])
data['manufacturing'].value_counts().plot(kind='bar',
figsize=(14,8),
title="manufacturingFrequency",
ax=axes[23][0])
(data['manufacturing'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="manufacturingPercent",
ax=axes[23][1])
data['technology'].value_counts().plot(kind='bar',
figsize=(14,8),
title="technologyFrequency",
ax=axes[24][0])
(data['technology'].value_counts(normalize=True) * 100).plot(kind='bar',
figsize=(14,8),
title="technologyPercent",
ax=axes[24][1])
top = 10 # the top of the subplots of the figure
hspace = 1.2 # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)
data = get_clean_data('quiz_data.csv',True)
print("program vs. happy")
plot_data = normalize_1_variables(data,"program","happy")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="happy",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. problem_type")
plot_data = normalize_1_variables(data,"program","problem_type")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="problem_type",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. creative")
plot_data = normalize_1_variables(data,"program","creative")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="creative",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
# print("program vs. industry")
# plot_data = normalize_1_variables(data,"program","industry")
# g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="industry",
# data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
# g.set_xlabels('')
# g.set_ylabels('percent')
# for ax in g.axes:
# plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
# plt.subplots_adjust(hspace=0.3)
# plt.show()
print("program vs. outdoors")
plot_data = normalize_1_variables(data,"program","outdoors")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="outdoors",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. career")
plot_data = normalize_1_variables(data,"program","career")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="career",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. group_work")
plot_data = normalize_1_variables(data,"program","group_work")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="group_work",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. liked_courses")
plot_data = normalize_1_variables(data,"program","liked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_courses",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. disliked_courses")
plot_data = normalize_1_variables(data,"program","disliked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_courses",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. programming")
plot_data = normalize_1_variables(data,"program","programming")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="programming",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. join_clubs")
plot_data = normalize_1_variables(data,"program","join_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="join_clubs",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. not_clubs")
plot_data = normalize_1_variables(data,"program","not_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="not_clubs",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. liked_projects")
plot_data = normalize_1_variables(data,"program","liked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_projects",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. disliked_projects")
plot_data = normalize_1_variables(data,"program","disliked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_projects",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. tv_shows")
plot_data = normalize_1_variables(data,"program","tv_shows")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="tv_shows",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. alternate_degree")
plot_data = normalize_1_variables(data,"program","alternate_degree")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="alternate_degree",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. expensive_equipment")
plot_data = normalize_1_variables(data,"program","expensive_equipment")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="expensive_equipment",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. drawing")
plot_data = normalize_1_variables(data,"program","drawing")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="drawing",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. essay")
plot_data = normalize_1_variables(data,"program","essay")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="essay",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. architecture")
plot_data = normalize_1_variables(data,"program","architecture")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="architecture",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. automotive")
plot_data = normalize_1_variables(data,"program","automotive")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="automotive",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. business")
plot_data = normalize_1_variables(data,"program","business")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="business",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. construction")
plot_data = normalize_1_variables(data,"program","construction")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="construction",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. health")
plot_data = normalize_1_variables(data,"program","health")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="health",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. environment")
plot_data = normalize_1_variables(data,"program","environment")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="environment",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. manufacturing")
plot_data = normalize_1_variables(data,"program","manufacturing")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="manufacturing",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
print("program vs. technology")
plot_data = normalize_1_variables(data,"program","technology")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="technology",
data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')
for ax in g.axes:
plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from data_load import get_encoded_data
import json
import pandas as pd
import numpy as np
from sklearn import preprocessing
import pickle
directory = 'golf_data.csv'
df = pd.read_csv(directory,dtype=str)
df = df.drop(df.columns[[0,6]], axis=1)
print(df.head())
outlook = {'Rainy': 1, 'Overcast': 0, 'Sunny': 2, 'column': 'OUTLOOK'}
temperature = {'Hot': 1, 'Mild': 2, 'Cool': 0, 'column': 'TEMPERATURE'}
humidity = {'High': 0, 'Normal': 1, 'column': 'HUMIDITY'}
windy = {'FALSE': 0, 'TRUE': 1, 'column': 'WINDY'}
col_list = list(df.columns)
encoded_dict_list = []
for col in col_list:
keys = df[col].unique()
le = preprocessing.LabelEncoder()
le.fit(list(keys))
df[col] = le.transform(list(df[col]))
vals = df[col].unique()
keys = list(le.inverse_transform(vals))
cd = dict(zip(keys,vals))
cd['column'] = col
encoded_dict_list.append(cd)
print(encoded_dict_list)
print(df.head())
x_df = df.drop(axis=1,columns=["PLAY"])
y_df = df["PLAY"]
X = np.array(x_df) # convert dataframe into np array
y = np.array(y_df) # convert dataframe into np array
mnb = MultinomialNB()
model = mnb.fit(x_df, y_df) # fit the model using training data
cat = df.drop('PLAY',axis=1)
index_dict = dict(zip(cat.columns,range(cat.shape[1])))
with open('nb_model.pkl', 'wb') as fid:
pickle.dump(model, fid,2)
'''
We need to create our feature vector of exact same dimension as our training set. To convert our user input into dummy variables, we should save a dict of the the dummy variables. Later we can populate our feature vector for prediction using this dict.
'''
with open('cat', 'wb') as fid:
pickle.dump(index_dict, fid,2)
post_dict = {
'OUTLOOK':'Overcast',
'TEMPERATURE':'Cool',
'HUMIDITY':'Normal',
'WINDY':'FALSE'
}
new_vector = y
new_vector[0] = outlook[post_dict['OUTLOOK']]
new_vector[1] = temperature[post_dict['TEMPERATURE']]
new_vector[2] = humidity[post_dict['HUMIDITY']]
new_vector[3] = windy[post_dict['WINDY']]
new_vector = [new_vector[0:4]]
print(new_vector)
print("Loading model")
pkl_file = open('nb_model.pkl', 'rb')
nb_model = pickle.load(pkl_file)
prediction = nb_model.predict(new_vector)
if prediction == 0:
response_message = 'You should not play golf today'
rm = 'NO'
else:
response_message = 'You could play golf today'
rm = 'YES'
print(rm)
prediction = nb_model.predict_proba(new_vector)
print(prediction)
# First index is probability of no, second index is probability of yes
prediction = nb_model.predict_log_proba(new_vector)
print(prediction)
# Toggle Code
import ipywidgets as widgets
from IPython.display import display, HTML
javascript_functions = {False: "hide()", True: "show()"}
button_descriptions = {False: "Show code", True: "Hide code"}
def toggle_code(state):
"""
Toggles the JavaScript show()/hide() function on the div.input element.
"""
output_string = "<script>$(\"div.input\").{}</script>"
output_args = (javascript_functions[state],)
output = output_string.format(*output_args)
display(HTML(output))
def button_action(value):
"""
Calls the toggle_code function and updates the button description.
"""
state = value.new
toggle_code(state)
value.owner.description = button_descriptions[state]
state = False
toggle_code(state)
button = widgets.ToggleButton(state, description = button_descriptions[state])
button.observe(button_action, "value")
display(button)